import scrapy
import json
import time
from mySpider.items import Weibo


class shujiaSpider(scrapy.Spider):
    # 需要通过名称启动爬虫
    name = "shujiaSpider"

    def start_requests(self):
        url = "https://m.weibo.cn/comments/hotflow?id=4438194663541187&mid=4438194663541187&max_id_type=0&display=0&retcode=6102"

        # 发起请求  如果请求成功  回调parse

        for i in [0, 1, 2, 3, 4, 5]:
            yield scrapy.Request(url=url, callback=self.parse)

    # 回调函数
    def parse(self, response):
        # 获取网页内容
        print(response.text)
        jsonStr = json.loads(response.text)
        for line in jsonStr["data"]["data"]:
            text = line["text"]
            user = line["user"]
            u_id = user["id"]
            screen_name = user["screen_name"]
            gender = user["gender"]
            follow_count = user["follow_count"]

            item = Weibo()

            item["text"] = text
            item["u_id"] = u_id
            item["screen_name"] = screen_name
            item["gender"] = gender
            item["follow_count"] = follow_count

            # 将数据发送到pipeline
            yield item
